/*    Copyright 2012 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.subcommands;

import java.util.ArrayList;
import java.util.List;

import mosdi.fa.CDFA;
import mosdi.fa.DFAFactory;
import mosdi.fa.FiniteMemoryTextModel;
import mosdi.fa.GeneralizedString;
import mosdi.fa.GeneralizedStringsNFA;
import mosdi.fa.IIDTextModel;
import mosdi.fa.NFA;
import mosdi.fa.NodeLimitExceededException;
import mosdi.paa.PAA;
import mosdi.paa.TextBasedPAA;
import mosdi.paa.apps.MatchCountDAA;
import mosdi.util.Alphabet;
import mosdi.util.Log;

public class SeedStatsSubcommand extends Subcommand {

	@Override
	public String usage() {
		return
		super.usage()+" [options] <seed>\n" +
		"\n" +
		"  Assumes an ungapped i.i.d. homology model, i.e. an i.i.d. text model\n" +
		"  over {0,1}, where 0 stands for a mismatch and 1 stands for a match.\n" +
		"\n" +
		"  A seed is a sequence over {1,*}, where * means \"don't care\"." +
		"\n" +
		"  Computes the distribution of the number of random hits of the given seed.\n" +
		"\n" +
		"Options:\n" +
		"  -p <prob>: homology level (probability of a match, default:0.95)\n" +
		"  -l <length>: length of random sequence to consider (default: 64)\n" +
		"  -k <maxhits>: compute distribution up to <maxhits> (default: 1)";
	}

	@Override
	public String description() {
		return "Calculates the sensitivty (or whole hit count distribution) for a given alignment seed.";
	}

	@Override
	public String name() {
		return "seed-stat";
	}

	@Override
	public int run(String[] args) {
		parseOptions(args, 1, "p:l:k:");

		// Option dependencies
		// NONE

		// Mandatory arguments
		String seed = getStringArgument(0);

		// Options
		double matchProbability = this.getRangedDoubleOption("p", 0.0, 1.0, 0.95);
		int length = getPositiveIntOption("l", 64);
		int maxHits = getPositiveIntOption("k", 1);

		Alphabet alphabet = new Alphabet("01");
		GeneralizedString seedGenString = new GeneralizedString(alphabet, seed, '*');

		double[] characterDistribution = {1.0-matchProbability, matchProbability};
		FiniteMemoryTextModel textModel = new IIDTextModel(alphabet.size(), characterDistribution); 
		// construct automaton
		CDFA cdfa = null;
		List<GeneralizedString> genStringList = new ArrayList<GeneralizedString>();
		genStringList.add(seedGenString);
		int states = -1;
		int statesMinimal = -1;
		Log.startTimer();
		try {
			NFA nfa;
			nfa = new GeneralizedStringsNFA(genStringList); 
			cdfa = DFAFactory.build(alphabet, nfa, 50000);
			states = cdfa.getStateCount();
			Log.printf(Log.Level.VERBOSE, "DFA states: \"%d\"%n", states);
			cdfa = cdfa.minimizeHopcroft();
			statesMinimal = cdfa.getStateCount();
			Log.printf(Log.Level.VERBOSE, "DFA states (after minimization): \"%d\"%n", statesMinimal);
		} catch (NodeLimitExceededException e) {
			Log.println(Log.Level.STANDARD, "Skipping: node limit exceeded (switch -N)");
			return 1;
		}
		if (Log.levelAtLeast(Log.Level.DEBUG)) {
			Log.print(Log.Level.DEBUG, cdfa.toString());
		}
		// resulting distribution of matches we are interested in
		double[] matchDistribution = null;
		MatchCountDAA daa = new MatchCountDAA(cdfa, maxHits);
		PAA paa = new TextBasedPAA(daa, textModel);
		double[][] stateValueDistribution = paa.computeStateValueDistribution(length);
		matchDistribution = paa.toValueDistribution(stateValueDistribution);
		Log.stopTimer("Computing PAA hit count distribution");
		double time = Log.getLastPeriodCpu();
		Log.println(Log.Level.STANDARD, "Format: >>seed>> seed >>runtime>> runtime >>hit-count-dist>> hit-count-distribution ");
		StringBuilder sb = new StringBuilder();
		sb.append(String.format(">>seed>> %s ", seed));
		sb.append(Log.format(">>runtime>> %t ", time));
		sb.append(">>dist>> ");
		for (int i=0; i<=maxHits; ++i) {
			sb.append(String.format("%e ", matchDistribution[i]));
		}
		Log.println(Log.Level.STANDARD, sb.toString());
		return 0;
	}
}


